package com.study.bigdata.spark.core.wc

import org.apache.spark.{SparkConf, SparkContext}

object Scala02_WordCount {
  def main(args: Array[String]): Unit = {
    // TODO spark

    // 1.添加依赖
    // 2.获取spark的连接
    val conf = new SparkConf().setMaster("local").setAppName("WordCount")
    val sc = new SparkContext(conf)

    // 3.读取文件
    val lines = sc.textFile("output/test.txt")
    // 4.将文件中数据进行分词
    val words = lines.flatMap(_.split(" "))
    val wordToOne = words.map((_,1))
    // 5.将分词后的数据进行分组
    val wordGroup = wordToOne.groupBy(_._1)
    // 6.统计分析
    val wordCount = wordGroup.mapValues(
      list =>{
        list.reduce(
          (t1,t2) =>{
            (t1._1,t1._2+t2._2)
          }
        )
      }
    )

    // 7.将统计结果打印到控制台
    wordCount.collect().foreach(println)
    sc.stop()

  }

}
